#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2009 Søren Roug, European Environment Agency
#
# This is free software.  You may redistribute it under the terms
# of the Apache license and the GNU General Public License Version
# 2 or at your option any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
#
# Contributor(s):
#
import zipfile
from xml.sax import make_parser,handler
from xml.sax.xmlreader import InputSource
import xml.sax.saxutils
import sys
from odf.opendocument import OpenDocument
from odf import element, grammar
from odf.namespaces import *
from odf.attrconverters import attrconverters, cnv_string

try:
    from cStringIO import StringIO
except ImportError:
    from StringIO import StringIO


extension_attributes = {
	 "OpenDocument 1.2" : {
		(OFFICENS,u'document-content'):(
			(GRDDLNS,u'transformation'),
		),
		(OFFICENS,u'document-meta'):(
			(GRDDLNS,u'transformation'),
		),
		(OFFICENS,u'document-styles'):(
			(GRDDLNS,u'transformation'),
		),
		(STYLENS,u'section-properties'): (
			(STYLENS,u'editable'),
		),
		(MANIFESTNS,u'file-entry'): (
			(MANIFESTNS,u'version'),
		),
		(TEXTNS,u'list'): (
			(TEXTNS,u'continue-list'),
			(XMLNS,u'id'),
		),
	},
	 "OpenOffice.org" : {
		(METANS,u'template'): (
			(XLINKNS,u'role'),
		),
		(STYLENS,u'graphic-properties'): (
			(STYLENS,u'background-transparency'),
		),
		(STYLENS,u'paragraph-properties'): (
			(TEXTNS,u'enable-numbering'),
                        (STYLENS,u'join-border'),
		),
		(STYLENS,u'table-cell-properties'): (
			(STYLENS,u'writing-mode'),
		),
		(STYLENS,u'table-row-properties'): (
			(STYLENS,u'keep-together'),
		),
	},
	"KOffice" : {
		(STYLENS,u'graphic-properties'): (
			(KOFFICENS,u'frame-behavior-on-new-page'),
		),
		(DRAWNS,u'page'): (
			(KOFFICENS,u'name'),
		),
		(PRESENTATIONNS,u'show-shape'): (
			(KOFFICENS,u'order-id'),
		),
		(PRESENTATIONNS,u'hide-shape'): (
			(KOFFICENS,u'order-id'),
		),
		(CHARTNS,u'legend'): (
			(KOFFICENS,u'title'),
		),
	}
}

printed_errors = []

def print_error(str):
    if str not in printed_errors:
        printed_errors.append(str)
        print str

def chop_arg(arg):
    if len(arg) > 20:
        return "%s..." % arg[0:20]
    return arg

def make_qname(tag):
    return "%s:%s" % (nsdict.get(tag[0],tag[0]), tag[1])

def allowed_attributes(tag):
   return grammar.allowed_attributes.get(tag)


class ODFElementHandler(handler.ContentHandler):
    """ Extract headings from content.xml of an ODT file """
    def __init__(self, document):
        self.doc = document
        self.tagstack = []
        self.data = []
        self.currtag = None

    def characters(self, data):
        self.data.append(data)

    def startElementNS(self, tag, qname, attrs):
        """ Pseudo-create an element
        """
        allowed_attrs = grammar.allowed_attributes.get(tag)
        attrdict = {}
        for (att,value) in attrs.items():
            prefix = nsdict.get(att[0],att[0])
            # Check if it is a known extension
            notan_extension = True
	    for product, ext_attrs in extension_attributes.items():
                allowed_ext_attrs = ext_attrs.get(tag)
                if allowed_ext_attrs and att in allowed_ext_attrs:
                    print_error("Warning: Attribute %s in element <%s> is illegal - %s extension"  % ( make_qname(att), make_qname(tag), product))
                    notan_extension = False
            # Check if it is an allowed attribute
            if notan_extension and allowed_attrs and att not in allowed_attrs:
                print_error("Error: Attribute %s:%s is not allowed in element <%s>" % ( prefix, att[1], make_qname(tag)))
            # Check the value
            try:
                convert = attrconverters.get(att, cnv_string)
                convert(att, value, tag)
            except ValueError, res:
                print_error("Error: Bad value '%s' for attribute %s:%s in  tag: <%s> - %s" % 
                    (chop_arg(value), prefix, att[1], make_qname(tag), res))

        self.tagstack.append(tag)
        self.data = []
        # Check that the parent allows this child element
        if tag not in ( (OFFICENS, 'document'), (OFFICENS, 'document-content'), (OFFICENS, 'document-styles'),
             (OFFICENS, 'document-meta'), (OFFICENS, 'document-settings'),
             (MANIFESTNS,'manifest')):
            try:
                parent = self.tagstack[-2]
                allowed_children = grammar.allowed_children.get(parent)
            except:
                print_error("Error: This document starts with the wrong tag: <%s>" % make_qname(tag))
                allowed_children = None
            if allowed_children and tag not in allowed_children:
                print_error("Error: Element %s is not allowed in element %s" % ( make_qname(tag), make_qname(parent)))
        # Test that all mandatory attributes have been added.
        required = grammar.required_attributes.get(tag)
        if required:
            for r in required:
                if attrs.get(r) is None:
                    print_error("Error: Required attribute missing: %s in <%s>" % (make_qname(r), make_qname(tag)))


    def endElementNS(self, tag, qname):
        self.currtag = self.tagstack.pop()
        str = ''.join(self.data).strip()
        # Check that only elements that can take text have text
        # But only elements we know exist in grammar
        if tag in grammar.allowed_children:
            if str != '' and tag not in grammar.allows_text:
                print_error("Error: %s does not allow text data" % make_qname(tag))
        self.data = []

class ODFDTDHandler(handler.DTDHandler):
    def notationDecl(self, name, public_id, system_id):
        """ Ignore DTDs """
        print_error("Warning: ODF doesn't use DOCTYPEs")

def exitwithusage(exitcode=2):
    """ print out usage information """
    sys.stderr.write("Usage: %s inputfile\n" % sys.argv[0])
    sys.stderr.write("\tInputfile must be OpenDocument format\n")
    sys.exit(exitcode)

def lint(odffile):
    if not zipfile.is_zipfile(odffile):
        print_error("Error: This is not a zipped file")
        return
    zfd = zipfile.ZipFile(odffile)
    try:
        mimetype = zfd.read('mimetype')
    except:
        mimetype=''
    d = OpenDocument(mimetype)
    first = True
    for zi in zfd.infolist():
        if first:
            if zi.filename == 'mimetype':
                if zi.compress_type != zipfile.ZIP_STORED:
                    print_error("Error: The 'mimetype' member must be stored - not deflated")
                if zi.comment != "":
                    print_error("Error: The 'mimetype' member must not have extra header info")
            else:
                print_error("Warning: The first member in the archive should be the mimetype")
        first = False
        if zi.filename in ('META-INF/manifest.xml', 'content.xml', 'meta.xml', 'styles.xml', 'settings.xml'):
            content = zfd.read(zi.filename)
            parser = make_parser()
            parser.setFeature(handler.feature_namespaces, True)
            parser.setFeature(handler.feature_external_ges, False)
            parser.setContentHandler(ODFElementHandler(d))
            dtdh = ODFDTDHandler()
            parser.setDTDHandler(dtdh)
            parser.setErrorHandler(handler.ErrorHandler())

            inpsrc = InputSource()
            inpsrc.setByteStream(StringIO(content))
            parser.parse(inpsrc)


if len(sys.argv) != 2:
    exitwithusage()
lint(sys.argv[1])

